Analyse GP Surgery Accessibility in Walthamstow

Locations Provided by Open Street Map Data
Author

Rich Leyshon

Published

February 27, 2024

Click to show code
from datetime import datetime, timedelta
import glob
import os
from pathlib import Path
import subprocess
from time import sleep
import toml

import contextily as ctx
import folium
import geopandas as gpd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderUnavailable
from geopy import point
import matplotlib.pyplot as plt
import pandas as pd
from pyprojroot import here
import r5py
import requests
from shapely import geometry

from transport_performance.osm.osm_utils import filter_osm

from travel_time_experiments.ingest_ons_geo import get_ons_geo_data, get_ons_geo_paginated

# get private user agent
USER_AGENT = toml.load(here(".secrets.toml"))["nominatim"]["USER_AGENT"]

London OSM doesn’t include Loughton & Cheshunt. To avoid edge effects, ingest England OSM latest & filter to custom bounding box.

Click to show code
osm_pth = here("data/external/osm/england-latest-osm.pbf")
if not os.path.exists(osm_pth):
    subprocess.run(
        [
            "curl",
            "https://download.geofabrik.de/europe/united-kingdom/england-latest.osm.pbf",
            "-o",
            osm_pth
            ])
Click to show code
filtered_osm_pth = here("data/external/osm/walthamstow-aoi-osm.pbf")
BBOX = [-0.368022,51.432659,0.296356,51.765395]
if not os.path.exists(filtered_osm_pth):
    filter_osm(
        pbf_pth=osm_pth, out_pth=filtered_osm_pth, bbox=BBOX, tag_filter=False)

From NHS Service Search , surgeries within 50 miles serving Hoe Street, Wolthamstow. Postcodes provided and will need to be geocoded. Investigate whether coordinates are available in NHS developer portal, registration required.

Click to show code
surg_pth = here("data/external/features/nhs-surgeries-E17-3AX.csv")
if not os.path.exists(surg_pth):
    subprocess.run([
        "curl",
        "https://www.nhs.uk/service-search/other-services/GP/E17-3AX/Export/4/-0.0193444043397903/51.5837821960449/4/0?distance=50&ResultsOnPageValue=10&isNational=0&totalItems=2845&currentPage=1",
        "-o",
        surg_pth
    ])

surgeries = pd.read_csv(surg_pth)
surgeries = surgeries.loc[:, ["Organisation Name", "PostCode"]]
surgeries["id"] = list(range(0, len(surgeries)))
Click to show code
def get_surgery_locs_nominatim(
    df: pd.DataFrame, out_pth: Path, sleep_s:float=2.0, user: str = USER_AGENT
    ) -> None:
    """Use orgnm and then postcode to attempt geolocating surgeries. 
    
    Uses Nominatim. Writes to file if error encountered. 
    """
    tmp_df = df.copy(deep=True)
    # Instantiate a new Nominatim client
    app = Nominatim(
        user_agent="")
    tmp_df["lat"] = 0.0
    tmp_df["lon"] = 0.0
    tmp_df["geocode_type"] = "None"
    geocode_type = []
    for i, row in tmp_df.iterrows():
        loc = {"lon": 0.0, "lat": 0.0}
        geo_type = "None"
        try:
            loc = app.geocode(
                query=row["Organisation Name"],
                country_codes="gb",
                viewbox=[
                    point.Point(51.375, -0.509), point.Point(51.868, 0.530)],
                bounded=True)
            print("Geocode by Org Name success")
            sleep(sleep_s)
            geo_type = "from_nm"
            if not loc:
                # case where name did not return a location
                loc = app.geocode(
                    query=row["PostCode"],
                    country_codes="gb",
                    viewbox=[
                        point.Point(51.375, -0.509), point.Point(51.868, 0.530)
                        ],
                    bounded=True)
                print("Geocode by Postcode success")
                sleep(sleep_s)
                geo_type = "from_pcd"
        except GeocoderUnavailable:
            # nominatim is down
            print(f"Breaking on {row['Organisation Name']}")
            break
        finally:
            # update the geometry column
            if loc:
                loc = loc.raw
                tmp_df.loc[i, ["lat"]] = loc["lat"]
                tmp_df.loc[i, ["lon"]] = loc["lon"]
                tmp_df.loc[i, ["geocode_type"]] = geo_type
            else:
                break
    tmp_df.to_csv(out_pth)
    return tmp_df
Click to show code
# execute once only as hammers the nominatim service
geocoded_surgeries_pth = here(
    "data/external/features/geocoded-london-surgeries.csv")
if not os.path.exists(geocoded_surgeries_pth):
    get_surgery_locs_nominatim(df=surgeries, out_pth=cache_pth, sleep_s=2.0)

Present the geocoded surgeries.

Click to show code
geocd_pth = here("data/external/features/").glob("geocode-surgeries-*")
geocd_surgeries = pd.read_csv(list(geocd_pth)[0], index_col=0)
geocd_surgeries = geocd_surgeries.dropna()
geocd_surgeries = gpd.GeoDataFrame(
    geocd_surgeries,
    geometry=gpd.points_from_xy(
        geocd_surgeries["lon"], geocd_surgeries["lat"]), crs=4326)
geocd_surgeries.drop(["lat", "lon"], axis=1, inplace=True)
geocd_surgeries.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook

Note that the accuracy of surgery location varies dependent upon whether the surgeries were geolocated from the organisation name or the postcode. Most are from postcode. Accuracy of these points will depend on size of postcode.

Click to show code
code_stats = geocd_surgeries["geocode_type"].describe()
print(
    f"{round((code_stats.freq / len(geocd_surgeries)) * 100, 1)} % of "
    f"{len(geocd_surgeries)} surgeries were geocoded by postcode.")
57.4 % of 535 surgeries were geocoded by postcode.

Ingest Pop-weighted centroids.

Click to show code
ENDPOINT = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_2021_PWC_V3/FeatureServer/0/query"

PARAMS = {
    "where": "OA21CD like 'E%'",
    "f": "geoJSON", 
    "outFields": "*",
    "outSR": 4326,
    "returnCountOnly": True,
}
resp = requests.get(ENDPOINT, PARAMS)
cont = resp.json()
n_centroids = cont['properties']['count']
print(
    f"There are {n_centroids:,} OA Centroids with the specified OA21CD pattern"
    )
There are 178,605 OA Centroids with the specified OA21CD pattern
Click to show code
PARAMS["returnCountOnly"] = False
PARAMS["resultOffset"] = 0
centroids = get_ons_geo_paginated(ENDPOINT, PARAMS)
print(f"All centroids ingested? {len(centroids) == n_centroids}")
All centroids ingested? True

Ingest Wolthamstow LAD boundary

Going with LAD21 to match centroid release, though 23 is available. LADCD for Wolthamstow is E09000031.

Click to show code
ENDPOINT = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/LAD_Dec_2021_GB_BFC_2022/FeatureServer/0/query"
PARAMS["where"] = "LAD21CD = 'E09000031'"
del PARAMS["resultOffset"]
PARAMS["outSR"] = 27700 # get in BNG as will need to buffer
_, boundary = get_ons_geo_data(ENDPOINT, PARAMS)
boundary.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook

Buffer the LAD boundary to avoid edge effects - people may prefer surgeries in the adjacent local authority. Buffer by 5km, pretty arbitrary rule of thumb that can be adjusted to suit.

Click to show code
buffered_bound = gpd.GeoDataFrame({"geometry": boundary.buffer(5000)})

imap = boundary.explore(
    map_kwds={
        "center": {"lat": 51.603, "lng": -0.018}
    },
    zoom_start=10)
imap = buffered_bound.explore(
    m=imap,
    style_kwds=dict(
        color="purple", weight=1, fillOpacity=0.3
        ))
imap
Make this Notebook Trusted to load map: File -> Trust Notebook

Use the centroids within the LAD boundary (blue). Use the GP surgeries within the 5km buffer (purple).

Click to show code
# clip centroids to LAD boundary
boundary = boundary.to_crs(4326)
walth_centroids = centroids.sjoin(boundary)
# clip surgeries to buffer
buffered_bound = buffered_bound.to_crs(4326)
proximal_gps = geocd_surgeries.sjoin(buffered_bound)
imap = walth_centroids.explore(map_kwds={
        "center": {"lat": 51.603, "lng": -0.018}
    },
    zoom_start=11.5, color="navy")
imap = proximal_gps.explore(
    m=imap,
    marker_type="marker",
    marker_kwds={
        "icon": folium.map.Icon(
            color="green", icon="briefcase-medical", prefix="fa")
    }
)
imap
Make this Notebook Trusted to load map: File -> Trust Notebook

These are the journey origins (navy) and destinations (green).

Click to show code
tn = r5py.TransportNetwork(filtered_osm_pth)
# journey time with todays date
walth_centroids["id"] = list(range(0, len(walth_centroids)))
proximal_gps["id"] = list(range(0, len(proximal_gps)))
dept_time = datetime.now().replace(hour=8, minute=0, second=0, microsecond=0)

travel_time_matrix = r5py.TravelTimeMatrixComputer(
    tn,
    origins=walth_centroids,
    destinations=proximal_gps,
    transport_modes=[r5py.TransportMode.CAR, r5py.TransportMode.WALK],
    departure=dept_time,
    departure_time_window=timedelta(minutes=10), # Up to 08:10
    snap_to_network=True,
    max_time=timedelta(minutes=30), # 30 minute journey max 
).compute_travel_times()
travel_time_matrix.head()
from_id to_id travel_time
0 0 0 9.0
1 0 1 10.0
2 0 2 10.0
3 0 3 24.0
4 0 4 4.0

Calculate median travel times across surgeries.

Click to show code
med_tts = travel_time_matrix.drop("to_id", axis=1).groupby("from_id").median()
ENDPOINT = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Output_Areas_2021_EW_BFC_V8/FeatureServer/0/query"
PARAMS["where"] = "OA21CD LIKE 'E%'"
PARAMS["resultOffset"] = 0
PARAMS["outSR"] = 4326
oa_polys = get_ons_geo_paginated(ENDPOINT, PARAMS)

waltham_tts = med_tts.join(walth_centroids.set_index("id"))
waltham_tts = waltham_tts.set_index("OA21CD").join(
    oa_polys.set_index("OA21CD"), lsuffix="_l", rsuffix="_r")
waltham_tts = gpd.GeoDataFrame(waltham_tts, geometry="geometry_r", crs=4326)
waltham_tts.head()
travel_time geometry_l FID_l GlobalID_left index_right OBJECTID LAD21CD LAD21NM LAD21NMW BNG_E_l ... LSOA21CD LSOA21NM LSOA21NMW BNG_E_r BNG_N_r LAT_r LONG_r Shape__Area_r Shape__Length_r GlobalID
OA21CD
E00185707 19.0 POINT (-0.03593 51.59139) 153691 7c3d3d98-7388-4561-ab16-342bdfe4ea87 0 307 E09000031 Waltham Forest 537328 ... E01004404 Waltham Forest 014D 536167 189892 51.59143 -0.035700 30049.309052 900.426020 22cc4956-c4b3-4f8a-bdcb-895d1e72f2ec
E00022297 17.0 POINT (-0.01526 51.56533) 153747 d3d84d0f-2c7c-4eb8-b478-1f17414df99b 0 307 E09000031 Waltham Forest 537328 ... E01004435 Waltham Forest 023E 537691 187002 51.56509 -0.014840 31297.801140 869.969390 9751b13d-a37d-424c-a4f1-410aaccc1b69
E00021864 17.0 POINT (-0.01775 51.59764) 153771 0b93ba07-dec2-45e1-b420-743f96ab0722 0 307 E09000031 Waltham Forest 537328 ... E01004347 Waltham Forest 011C 537425 190614 51.59761 -0.017270 15718.446632 910.345782 afff04a5-c07d-4bfb-bde4-fb488b660267
E00021924 18.0 POINT (-0.01807 51.62584) 153776 b9812f37-6505-4c44-8c58-e8e2249d143e 0 307 E09000031 Waltham Forest 537328 ... E01004361 Waltham Forest 005A 537330 193761 51.62591 -0.017410 40936.800621 1150.311663 714b9ffe-d59c-49a3-be4e-613bdc488ad4
E00022022 15.0 POINT (0.00292 51.59698) 153788 5a6e0d77-5d31-4d96-8852-9f62f64d683c 0 307 E09000031 Waltham Forest 537328 ... E01004377 Waltham Forest 010C 539041 190531 51.59647 0.006011 403417.859642 3404.518451 47529802-8f2d-4367-bd4c-96e21871b190

5 rows × 28 columns

Median Travel Time from Centroids to Surgeries

Use the selection widget on the right of the map to toggle layers on and off.

Click to show code
imap = waltham_tts[["geometry_r", "travel_time"]].explore(
    "travel_time",
    cmap="viridis_r",
    tiles="CartoDB positron",
    zoom_start=12,
    name="OA21 polygons")
imap = proximal_gps.explore(
    m = imap,
    marker_type="marker",
    marker_kwds={
        "icon": folium.map.Icon(
            color="green", icon="briefcase-medical", prefix="fa"),
    }, name="destinations"
)
imap = walth_centroids.reset_index()[["geometry", "OA21CD"]].explore(
    m=imap, color="navy", opacity=0.2, name="origins")
imap.save(
    "outputs/walthamstow-surgeries//median_tt_to_surgeries_5km_buffer.html")
folium.LayerControl().add_to(imap)
imap
Make this Notebook Trusted to load map: File -> Trust Notebook

This is an interesting situation - the high density of destinations in this locality could form the basis for a different treatment of proximal surgeries. The median travel time will be from one output area centroid to every GP on the map.

It may not be reasonable to consider all these options for each centroid. You could potentially draw a smaller buffer around each output area instead, and then calculate travel times to a smaller subset of proximal GPs.